{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Achari Berrada Youssef\n",
    "\n",
    "# This script is for training the Text specific Deep Boltzmann Machine. \n",
    "import os\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "import config\n",
    "\n",
    "import dbm\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Load flickr text\n",
    "dataset = np.load(\"flickr_text_dataset.npy\")\n",
    "D = np.sum(dataset, axis=1)  # length of each document\n",
    "\n",
    "model = dbm.DBM(\n",
    "    main_dir=\"flickr_rbm\", do_pretrain=True, layers=[1024, 1024],\n",
    "    models_dir=config.models_dir, data_dir=config.data_dir, summary_dir=config.summary_dir,\n",
    "    learning_rate=[0.001, 0.001], momentum=0.9, num_epochs=[1, 1], batch_size=[64, 64],\n",
    "    stddev=0.1, verbose=1, gibbs_k=[1, 1], model_name=\"flickr_dbm\",\n",
    "    finetune_learning_rate=0.01, finetune_enc_act_func=[tf.nn.sigmoid, tf.nn.sigmoid],\n",
    "    finetune_dec_act_func=[tf.nn.sigmoid, tf.nn.sigmoid], finetune_num_epochs=1, finetune_batch_size=128,\n",
    "    finetune_opt='momentum', finetune_loss_func=\"mean_squared\", finetune_dropout=0.5, noise=[\"rsm\", \"bin\"],\n",
    ")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Initialize documents lengths\n",
    "for i, _ in enumerate(model.rbms):\n",
    "    model.rbms[i].D = D\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Pretraining phase \n",
    "model.pretrain(dataset[:500], dataset[500:1000])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Fit and save the txt-DBM parameters \n",
    "# I put save_dbm_text_params as a quick hack to save the parameters of this dbm as a numpy array\n",
    "model.fit(dataset[:500], dataset[500:1000], dataset[1500:2000], dataset[2000:2500], save_dbm_text_params=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Output : \n",
    "    # text_dbm_layer_1_W.npy\n",
    "    # text_dbm_layer_1_b.npy\n",
    "    # text_dbm_layer_2_W.npy\n",
    "    # text_dbm_layer_2_b.npy"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda env:py27]",
   "language": "python",
   "name": "conda-env-py27-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
